# ------------------------------
# ' create county indicators and weights for ACS data
# ------------------------------
# load processed data
acs = read_fst(file.path(processed_path,'microACS_full_v1.fst'),as.data.table=TRUE)

# -------
# use county as geo weighting
county_puma = fread(file.path(rawdata_path,'georelated','geocorr2014_puma2000_to_county.csv'), skip=1)
names(county_puma) = gsub('\\(|\\)','',names(county_puma))
names(county_puma) = gsub(' ','_',names(county_puma))

county_puma[, county := str_pad(County_code, width=5, pad = '0')]
county_puma[, state := str_pad(State_code, width=2, pad = '0')]
county_puma[, PUMA := str_pad(PUMA_2000, width=5, pad = '0')]
county_puma[, puma := paste0(state,PUMA)]

county_puma[, weight := puma2k_to_county_allocation_factor]

#------------------------------------------------------------------------------
#  ACS
acs[, State := str_pad(State, width=2, pad='0')]
acs[, Puma := str_pad(Puma, width=5, pad='0')]
acs[, puma := paste0(State,Puma)]
acs[,HseNo := as.numeric(HseNo)]
acs[,PerNo := as.numeric(PerNo)]
acs[,uid := paste0(Year,'_',str_pad(HseNo,width=7,pad='0'),'_',str_pad(PerNo, width=2,pad='0'))]
acs[,DSID := 0]

#==============================================================================
acs_county = merge(x=county_puma[,c('puma','county','weight')], y = acs, by.x='puma', by.y='puma', all.x=TRUE, allow.cartesian=TRUE)
acs_county = acs_county[!is.na(Year)]

acs_county[, county_weight := PerWgt * weight]

# finally write the files
write_fst(acs_county, file.path(processed_path, 'acs_matched_county.fst'), 100)





